This notebook will begin looking at clustering methods on the expression of the genes in a single sample of the dataset of interest, from an unbiased approach.
# Load libraries
library(magrittr)
library(scater)
library(readr)
library(bluster)
library(ggpubr)
library(pheatmap)
# Set file paths
data_dir <- file.path("results", "Gawad_processed_data")
# Source custom functions script
source(file.path("utils", "clustering-functions.R"))
sample_290_normalized <- read_rds(
file.path(data_dir, "SCPCS000216", "SCPCL000290_miQC_processed_sce.rds"))
# Perform k-means clustering
sample_290_normalized <- kmeans_clustering(
sample_290_normalized,
params_range = c(4:10),
step_size = 1
)
# grab column names with clustering results
kmeans_cols <- grep("kmeans", colnames(colData(sample_290_normalized)))
kmeans_cluster_names <- colnames(colData(sample_290_normalized)[, kmeans_cols])
# Plot k-means
kmeans_plot_list <- kmeans_cluster_names %>%
purrr::map(~ plotReducedDim(sample_290_normalized, dimred = "UMAP", colour_by = .x) +
theme_bw()+
theme(text = element_text(size = 22)))
cowplot::plot_grid(plotlist = kmeans_plot_list, ncol = 4)
# Perform graph-based walktrap clustering
sample_290_normalized <- graph_clustering(
sample_290_normalized,
params_range = c(5:25),
step_size = 5,
weighting_type = "rank",
cluster_function = "walktrap"
)
# grab column names with clustering results
walktrap_cols <- grep("walktrap", colnames(colData(sample_290_normalized)))
walktrap_cluster_names <- colnames(colData(sample_290_normalized)[, walktrap_cols])
# Plot
walktrap_plot_list <- walktrap_cluster_names %>%
purrr::map(~ plotReducedDim(sample_290_normalized, dimred = "UMAP", colour_by = .x) +
theme_bw() +
theme(text = element_text(size = 22)))
cowplot::plot_grid(plotlist = walktrap_plot_list, ncol = 3)
# Perform graph-based louvain clustering
sample_290_normalized <- graph_clustering(
sample_290_normalized,
params_range = c(5:25),
step_size = 5,
weighting_type = "jaccard",
cluster_function = "louvain"
)
# grab column names with clustering results
louvain_cols <- grep("walktrap", colnames(colData(sample_290_normalized)))
louvain_cluster_names <- colnames(colData(sample_290_normalized)[, louvain_cols])
# Plot
louvain_plot_list <- louvain_cluster_names %>%
purrr::map(~ plotReducedDim(sample_290_normalized, dimred = "UMAP", colour_by = .x) +
theme_bw() +
theme(text = element_text(size = 22)))
cowplot::plot_grid(plotlist = louvain_plot_list, ncol = 3)
# Check the k-means cluster validity stats for each of the clusters and return
# stats in a data frame
kmeans_stats_df <- create_metadata_stats_df(sample_290_normalized, c(4:10), 1, "kmeans")
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
# Preview the results
head(kmeans_stats_df)
# Summarize the stats and return in a data frame
kmeans_summary_stats_df <- summarize_clustering_stats(kmeans_stats_df)
`summarise()` has grouped output by 'cluster_names_column', 'cluster_type'. You can override using the `.groups` argument.
# Preview the summary results
head(kmeans_summary_stats_df)
# Plot individual cluster purity stats
kmeans_purity_plots <- plot_cluster_purity(kmeans_stats_df)
kmeans_purity_plots
# Plot individual cluster silhouette width stats
kmeans_silhouette_plots <- plot_cluster_silhouette_width(kmeans_stats_df)
kmeans_silhouette_plots
# Check the walktrap cluster validity stats for each of the clusters and return
# stats in a data frame
walktrap_stats_df <- create_metadata_stats_df(sample_290_normalized, c(5:25), 5, "walktrap")
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
# Preview the all stats results
head(walktrap_stats_df)
# Summarize the stats and return in a data frame
walktrap_summary_stats_df <- summarize_clustering_stats(walktrap_stats_df)
`summarise()` has grouped output by 'cluster_names_column', 'cluster_type'. You can override using the `.groups` argument.
# Preview the summary results
head(walktrap_summary_stats_df)
# Plot individual cluster purity stats
walktrap_purity_plots <- plot_cluster_purity(walktrap_stats_df)
walktrap_purity_plots
# Plot individual cluster silhouette width stats
walktrap_silhouette_plots <- plot_cluster_silhouette_width(walktrap_stats_df)
walktrap_silhouette_plots
# Check the louvain cluster validity stats for each of the clusters and return
# stats in a data frame
louvain_stats_df <- create_metadata_stats_df(sample_290_normalized, c(5:25), 5, "louvain")
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
Joining, by = "cell_barcode"
# Preview the results
head(louvain_stats_df)
# Summarize the stats and return in a data frame
louvain_summary_stats_df <- summarize_clustering_stats(louvain_stats_df)
`summarise()` has grouped output by 'cluster_names_column', 'cluster_type'. You can override using the `.groups` argument.
# Preview the summary results
head(louvain_summary_stats_df)
# Plot individual cluster purity stats
louvain_purity_plots <- plot_cluster_purity(louvain_stats_df)
louvain_purity_plots
# Plot individual cluster silhouette width stats
louvain_silhouette_plots <- plot_cluster_silhouette_width(louvain_stats_df)
louvain_silhouette_plots
summary_stats_df_list <- list("walktrap" = walktrap_summary_stats_df,
"louvain" = louvain_summary_stats_df)
# purity summary plot
plot_avg_validity_stats(summary_stats_df_list, "avg_purity")
#silhouette width summary plot
plot_avg_validity_stats(summary_stats_df_list, "avg_width")
# Check cluster stability
kmeans_ari_df <- get_cluster_stability_summary(sample_290_normalized, c(4:10), 1, "kmeans")
kmeans_ari_df
# plot cluster stability ARI values
plot_cluster_stability_ari(kmeans_ari_df)
Warning: Continuous limits supplied to discrete scale.
Did you mean `limits = factor(...)` or `scale_*_continuous()`?
walktrap_ari_df <- get_cluster_stability_summary(sample_290_normalized, c(5:25), 5, cluster_type = "walktrap", cluster_function = "walktrap")
walktrap_ari_df
# plot cluster stability ARI values
plot_cluster_stability_ari(walktrap_ari_df)
Warning: Continuous limits supplied to discrete scale.
Did you mean `limits = factor(...)` or `scale_*_continuous()`?
louvain_ari_df <- get_cluster_stability_summary(sample_290_normalized, c(5:25), 5, cluster_type = "louvain", cluster_function = "louvain")
louvain_ari_df
# plot cluster stability ARI values
plot_cluster_stability_ari(louvain_ari_df)
Warning: Continuous limits supplied to discrete scale.
Did you mean `limits = factor(...)` or `scale_*_continuous()`?
summary_ari_df_list <- list("walktrap" = walktrap_ari_df,
"louvain" = louvain_ari_df)
# plot ARI summary plot
plot_summary_cluster_stability_ari(summary_ari_df_list)
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8 LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8
[6] LC_MESSAGES=C.UTF-8 LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] pheatmap_1.0.12 ggpubr_0.4.0 bluster_1.4.0 readr_2.1.1
[5] scater_1.22.0 ggplot2_3.3.5 scuttle_1.4.0 SingleCellExperiment_1.16.0
[9] SummarizedExperiment_1.24.0 Biobase_2.54.0 GenomicRanges_1.46.1 GenomeInfoDb_1.30.0
[13] IRanges_2.28.0 S4Vectors_0.32.3 BiocGenerics_0.40.0 MatrixGenerics_1.6.0
[17] matrixStats_0.61.0 magrittr_2.0.1
loaded via a namespace (and not attached):
[1] ggbeeswarm_0.6.0 colorspace_2.0-2 ggsignif_0.6.3 ellipsis_0.3.2
[5] modeltools_0.2-23 XVector_0.34.0 BiocNeighbors_1.12.0 rstudioapi_0.13
[9] farver_2.1.0 ggrepel_0.9.1 flexmix_2.3-17 fansi_0.5.0
[13] splines_4.1.2 sparseMatrixStats_1.6.0 knitr_1.36 polyclip_1.10-0
[17] jsonlite_1.7.2 broom_0.7.10 cluster_2.1.3 ggforce_0.3.3
[21] pdfCluster_1.0-3 compiler_4.1.2 backports_1.4.0 assertthat_0.2.1
[25] Matrix_1.4-1 fastmap_1.1.0 cli_3.1.0 tweenr_1.0.2
[29] BiocSingular_1.10.0 miQC_1.2.0 htmltools_0.5.2 tools_4.1.2
[33] rsvd_1.0.5 igraph_1.2.9 gtable_0.3.0 glue_1.5.1
[37] GenomeInfoDbData_1.2.7 dplyr_1.0.7 Rcpp_1.0.7 carData_3.0-4
[41] jquerylib_0.1.4 vctrs_0.3.8 DelayedMatrixStats_1.16.0 stringr_1.4.0
[45] xfun_0.28 beachmat_2.10.0 lifecycle_1.0.1 irlba_2.3.3
[49] renv_0.14.0 rstatix_0.7.0 zlibbioc_1.40.0 MASS_7.3-57
[53] scales_1.1.1 hms_1.1.1 parallel_4.1.2 RColorBrewer_1.1-2
[57] yaml_2.2.1 gridExtra_2.3 sass_0.4.0 stringi_1.7.6
[61] ScaledMatrix_1.2.0 BiocParallel_1.28.2 geometry_0.4.6 rlang_0.4.12
[65] pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.14 lattice_0.20-45
[69] purrr_0.3.4 labeling_0.4.2 cowplot_1.1.1 tidyselect_1.1.1
[73] R6_2.5.1 generics_0.1.1 DelayedArray_0.20.0 DBI_1.1.1
[77] pillar_1.6.4 withr_2.4.3 abind_1.4-5 RCurl_1.98-1.5
[81] nnet_7.3-17 tibble_3.1.6 crayon_1.4.2 car_3.0-12
[85] utf8_1.2.2 tzdb_0.2.0 rmarkdown_2.11 viridis_0.6.2
[89] grid_4.1.2 digest_0.6.29 tidyr_1.1.4 munsell_0.5.0
[93] beeswarm_0.4.0 viridisLite_0.4.0 vipor_0.4.5 bslib_0.3.1
[97] magic_1.6-0